/*** TEST SCORE RANK-RANK DISTRIBUTION REGRESSIONS ***/

set more 1
clear all

do "paths.do"

capture log close
log using "$LOGFILES\track_mob_2.log",replace

*************************************************
*** step 0: cleaning and simple transformations ***
*************************************************

use "$WORKING/track_mob_C.dta", clear

gen tmp1 = floor(base_pctfix * 20) * 5
foreach gg in 0 1 2 3 4 {
	tab tmp1 snomath`gg'
}
drop tmp1

foreach vv in obs_cls peer_prev peer_init sdpeer_prev sdpeer_init {
	egen mob_`vv'_48 = rowmean(`vv'0 `vv'1 `vv'2 `vv'3 `vv'4)
	egen mob_`vv'_45 = rowmean(`vv'0 `vv'1)
	egen mob_`vv'_68 = rowmean(`vv'2 `vv'3 `vv'4)
}

global AVAILVARS has_year*

gen mob_clssz_48 = mob_obs_cls_48
gen mob_clssz_45 = mob_obs_cls_45
gen mob_clssz_68 = mob_obs_cls_68

foreach yy in 0 1 2 3 4 {
	gen mob_scoreresid`yy' = mscore_fill_pct`yy'
	gen mob_has_year`yy' = has_year`yy'
}

global OUT1 scoreresid1 scoreresid2 scoreresid3 scoreresid4
global OUT2 clssz peer_prev peer_init sdpeer_prev sdpeer_init
global OUT3 has_year1 has_year2 has_year3 has_year4
global OUTSHORT $OUT1 $OUT3
foreach vv in $OUT2 {
    global OUTSHORT $OUTSHORT `vv'_48 `vv'_45 `vv'_68
}


*************************************************
*** step 1: campus-cohort level slopes and intercepts ***
*************************************************

gen score_half = "_lo"
replace score_half = "_hi" if (base_pctfix > 0.5)

gen score_mid = "_mid"
replace score_mid = "_edge" if (base_pctfix <= 0.25) | (base_pctfix > 0.75)

foreach yy in $OUTSHORT {
	gen n_`yy' = (mob_`yy' != .)
	gen x_`yy' = base_pctfix * n_`yy'
	gen x2_`yy' = (base_pctfix ^ 2) * n_`yy'
	gen y_`yy' = mob_`yy' * n_`yy'
	gen xy_`yy' = (mob_`yy' * base_pctfix) * n_`yy'
}

save "$WORKING/temp/track_mob_2-0.dta", replace

*
* bottom and top half
*
use "$WORKING/temp/track_mob_2-0.dta", clear
collapse (sum) n_* x_* x2_* y_* xy_* (mean) mu_over8=over8 mu_under8=under8, by(base_campus base_year score_half)
foreach yy in $OUTSHORT {
	*** determine if there is sufficient variation in x
	gen denom = ((x_`yy' ^ 2) - (n_`yy' * x2_`yy'))
	
	*** calculate coefficient estimates
	gen b0_`yy' = ((x_`yy' * xy_`yy') - (y_`yy' * x2_`yy')) / denom if (denom < 0)
	gen b1_`yy' = ((x_`yy' * y_`yy') - (n_`yy' * xy_`yy')) / denom if (denom < 0)
		
	*** predicted percentile scores
	gen pred_25_`yy' = b0_`yy' + (b1_`yy' * 0.25)
	gen pred_75_`yy' = b0_`yy' + (b1_`yy' * 0.75)
	
	drop denom
}
keep base_campus base_year score_half n_* b0_* b1_* pred_25_* pred_75_* mu_*
reshape wide n_* b0_* b1_* pred_25_* pred_75_* mu_*, i(base_campus base_year) j(score_half) string
save "$WORKING/temp/track_mob_2-1.dta", replace

*
* middle half - 25 pctile to 75 pctile
*
use "$WORKING/temp/track_mob_2-0.dta", clear
collapse (sum) n_* x_* x2_* y_* xy_* (mean) mu_over8=over8 mu_under8=under8, by(base_campus base_year score_mid)
foreach yy in $OUTSHORT {
	*** determine if there is sufficient variation in x
	gen denom = ((x_`yy' ^ 2) - (n_`yy' * x2_`yy'))
	
	*** calculate coefficient estimates
	gen b0_`yy' = ((x_`yy' * xy_`yy') - (y_`yy' * x2_`yy')) / denom if (denom < 0)
	gen b1_`yy' = ((x_`yy' * y_`yy') - (n_`yy' * xy_`yy')) / denom if (denom < 0)
	
	*** predicted percentile scores
	gen pred_50_`yy' = b0_`yy' + (b1_`yy' * 0.5)
	
	drop denom
}
keep base_campus base_year score_mid n_* b0_* b1_* pred_50_* mu_*
reshape wide n_* b0_* b1_* pred_50_* mu_*, i(base_campus base_year) j(score_mid) string
save "$WORKING/temp/track_mob_2-1a.dta", replace

*
* all together
*
use "$WORKING/temp/track_mob_2-0.dta", clear
collapse (sum) n_* x_* x2_* y_* xy_* (mean) mu_over8=over8 mu_under8=under8, by(base_campus base_year)
foreach yy in $OUTSHORT {
	*** calculate coefficient estimates
	gen b0_`yy'_all = ((x_`yy' * xy_`yy') - (y_`yy' * x2_`yy')) / ///
			((x_`yy' ^ 2) - (n_`yy' * x2_`yy')) if (n_`yy' > 1)
	gen b1_`yy'_all = ((x_`yy' * y_`yy') - (n_`yy' * xy_`yy')) / ///
			((x_`yy' ^ 2) - (n_`yy' * x2_`yy')) if (n_`yy' > 1)
	
	*** predicted percentile scores
	gen pred_25_`yy'_all = b0_`yy'_all + (b1_`yy'_all * 0.25)
	gen pred_75_`yy'_all = b0_`yy'_all + (b1_`yy'_all * 0.75)
}

save "$WORKING/temp/track_mob_2-2.dta", replace

merge 1:1 base_campus base_year using "$WORKING/temp/track_mob_2-1.dta"
assert (_merge == 3)
drop _merge
merge 1:1 base_campus base_year using "$WORKING/temp/track_mob_2-1a.dta"
assert (_merge == 3)
drop _merge

foreach yy in $OUTSHORT {
	*** predicted percentile scores
	gen pred_25_`yy'_half = pred_25_`yy'_lo
	gen pred_75_`yy'_half = pred_75_`yy'_hi
	gen pred_50_`yy'_half = pred_50_`yy'_edge
}

save "$WORKING/temp/track_mob_2-3.dta", replace


/*
Need, for each campus-cohort:
base score mean, sd
average tracking elementary, middle
fraction of students with records in each grade
*/
use "$WORKING/track_mob_C.dta", clear

gen one0 = 1

gen has_on_track0 = has_year0 & (grade0 == 4)
gen has_on_track1 = has_year1 & (grade1 == 5)
gen has_on_track2 = has_year2 & (grade2 == 6)
gen has_on_track3 = has_year3 & (grade3 == 7)
gen has_on_track4 = has_year4 & (grade4 == 8)
egen tmp1 = rowtotal(has_on_track*)
gen has_allyears = (tmp1 == 5)
drop tmp1

forvalues gg = 0(1)4 {
	gen has_bm`gg' = (has_year`gg' & (grade`gg' == (`gg' + 4)))
	foreach vv in trk_unadj trk_rel {
		gen `vv'_am`gg' = `vv'_dm`gg' if has_allyears
		gen has_`vv'_am`gg' = (`vv'_am`gg' != .)
		gen `vv'_bm`gg' = `vv'_dm`gg' if has_bm`gg'
		gen has_`vv'_bm`gg' = (`vv'_bm`gg' != .)
	}
	
}
save "$WORKING/temp/track_mob_2-4.dta", replace


** calculate campus and district means separately and merge.

** campuses
use "$WORKING/temp/track_mob_2-4.dta", clear
collapse (mean) cy_score_mn=base_pctfix trk_*_dm* has_year* ///
		trk_unadj0 trk_unadj1 trk_unadj2 trk_unadj3 trk_unadj4 trk_rel0 trk_rel1 trk_rel2 trk_rel3 trk_rel4 ///
		(p10) cy_score_p10=base_pctfix (p25) cy_score_p25=base_pctfix ///
		(p75) cy_score_p75=base_pctfix (p90) cy_score_p90=base_pctfix ///
		(sum) obs=one0 obs_am=has_allyears has_bm* trk_*_am* has_trk_*_am* trk_*_bm* has_trk_*_bm* ///
		(sd) cy_score_sd=base_pctfix, by(base_campus base_district base_year)
forvalues gg = 0(1)4 {
	foreach vv in trk_unadj trk_rel {
		replace `vv'_am`gg' = . if (has_`vv'_am`gg' == 0)
		replace `vv'_am`gg' = `vv'_am`gg' / obs_am
		replace `vv'_bm`gg' = . if (has_`vv'_bm`gg' == 0)
		replace `vv'_bm`gg' = `vv'_bm`gg' / has_bm`gg'
		
		rename `vv'`gg' `vv'_nm`gg'
	}
}
gen missing_any_tracking = 0
foreach yg in nm dm bm am {
	foreach vv in trk_unadj trk_rel {
		egen `vv'_elem_cy_`yg' = rowmean(`vv'_`yg'0 `vv'_`yg'1)
		egen `vv'_mid_cy_`yg' = rowmean(`vv'_`yg'2 `vv'_`yg'3 `vv'_`yg'4)
		foreach gg in 0 1 2 3 4 {
			replace missing_any_tracking = 1 if (`vv'_`yg'`gg' == .)
		}
	}
}
keep base_campus base_district base_year obs cy_score* trk_*_cy_* trk_*_dm* trk_*_nm* has_year* missing_any_tracking
order base_campus base_district base_year obs cy_score* trk_*_cy_* trk_*_dm* trk_*_nm* has_year* missing_any_tracking
save "$WORKING/temp/track_mob_2-5.dta", replace


** districts
use "$WORKING/temp/track_mob_2-4.dta", clear
collapse (mean) cy_score_mn=base_pctfix trk_*_dm* has_year* ///
		trk_unadj0 trk_unadj1 trk_unadj2 trk_unadj3 trk_unadj4 trk_rel0 trk_rel1 trk_rel2 trk_rel3 trk_rel4 ///
		(sum) obs=one0 obs_am=has_allyears has_bm* trk_*_am* has_trk_*_am* trk_*_bm* has_trk_*_bm* ///
		(sd) cy_score_sd=base_pctfix, by(base_district base_year)
forvalues gg = 0(1)4 {
	foreach vv in trk_unadj trk_rel {
		replace `vv'_am`gg' = . if (has_`vv'_am`gg' == 0)
		replace `vv'_am`gg' = `vv'_am`gg' / obs_am
		replace `vv'_bm`gg' = . if (has_`vv'_bm`gg' == 0)
		replace `vv'_bm`gg' = `vv'_bm`gg' / has_bm`gg'
		
		rename `vv'`gg' `vv'_nm`gg'
	}
}
foreach yg in nm dm bm am {
	foreach vv in trk_unadj trk_rel {
		egen `vv'_elem_dy_`yg' = rowmean(`vv'_`yg'0 `vv'_`yg'1)
		egen `vv'_mid_dy_`yg' = rowmean(`vv'_`yg'2 `vv'_`yg'3 `vv'_`yg'4)
	}
}
keep base_district base_year trk_*_dy_*
order base_district base_year trk_*_dy_*
save "$WORKING/temp/track_mob_2-6.dta", replace


** merge campuses and districts
use "$WORKING/temp/track_mob_2-5.dta", clear
merge m:1 base_district base_year using "$WORKING/temp/track_mob_2-6.dta"
assert _merge == 3
drop _merge
save "$WORKING/temp/track_mob_2-7.dta", replace
** merge mobility coefficients
merge 1:1 base_campus base_year using "$WORKING/temp/track_mob_2-3.dta"
assert _merge == 3
drop _merge

rename mu_over8_hi pred_75_over8_half
rename mu_under8_hi pred_75_under8_half
rename mu_over8_lo pred_25_over8_half
rename mu_under8_lo pred_25_under8_half
rename mu_over8_mid pred_50_over8_half
rename mu_under8_mid pred_50_under8_half

gen samp0 = 1
foreach yy in $OUTSHORT over8 under8 {
	foreach pp in 25 75 50 {
		replace samp0 = 0 if pred_`pp'_`yy'_half == .
	}
}
** dropping cohorts where any tracking measure is missing
replace samp0 = 0 if missing_any_tracking

save "$WORKING/track_mob_D.dta", replace

*************************************************
*** step 2: regress mobility on tracking ***
*************************************************

estimates clear
use "$WORKING/track_mob_D.dta", clear

global CTRLVARS1 cy_score_mn cy_score_sd
global CTRLVARS2 has_year1 has_year2 has_year3 has_year4
global TREAT_VARS trk_elem_x trk_mid_x
global INSTR_VARS trk_elem_z trk_mid_z
global CTRLFLEX cy_score_p10 cy_score_p25 cy_score_p75 cy_score_p90
global TREAT_VARS2 trk_elem_x trk_mid_x trk_elem_x2 trk_mid_x2
global INSTR_VARS2 trk_elem_z trk_mid_z trk_elem_z2 trk_mid_z2

** build subsamples

** samp0: no missing measures
keep if samp0

** samp1: only 2011 and 2015
gen samp1 = inlist(base_year, 2011, 2015)

** samp2: only contains campuses with big tracking changes between 2011 and 2015

** size average across 2011 and 2015
egen bc_obs = mean(obs) if samp1, by(base_campus)
** group by size
egen obs_dec_grp = cut(bc_obs) if samp1, group(10)

foreach xx in unadj rel {
	** combine grade-level tracking instruments
	gen trk_z = (0.4 * trk_`xx'_elem_cy_am) * (0.6 * trk_`xx'_mid_cy_am)
	
	** difference across 2011 and 2015
	egen tmp1 = mean(trk_z) if samp1, by(base_campus)
	gen tmp2 = (2 * (trk_z - tmp1))
	gen trk_diff = tmp2
	replace trk_diff = -tmp2 if (base_year == 2011)
	
	** group tracking instrument tails, within size groups
	gen trk_`xx'_zd_grp = .
	forvalues i=0(1)9 {
		egen tmp3_`i' = cut(trk_diff) if samp1 & (obs_dec_grp == `i'), group(3)
		replace trk_`xx'_zd_grp = tmp3_`i' if samp1 & (obs_dec_grp == `i')
		drop tmp3
	}
	gen samp2_`xx' = samp1 & (trk_`xx'_zd_grp != 1)
	
	drop tmp* trk_z trk_diff
}


** samp3: only STAAR cohorts 2013, 2014, and 2015
gen samp3 = inlist(base_year, 2013, 2014, 2015)

save "$WORKING/track_mob_E.dta", replace


** actual regressions begin here
use "$WORKING/track_mob_E.dta", clear

foreach yy in over8 under8 $OUTSHORT {
	disp "`yy'"
	global CTRLVARS $CTRLVARS1 $CTRLVARS2
	if (substr("`yy'", 1, 8) == "has_year") {
			disp "is an attrition variable"
			global CTRLVARS $CTRLVARS1
	}
	
	foreach xx in unadj rel {
	    capture drop trk_elem_x trk_mid_x trk_elem_x2 trk_mid_x2
	    gen trk_elem_x = trk_`xx'_elem_cy_dm
		gen trk_mid_x = trk_`xx'_mid_cy_dm
		
		gen trk_elem_x2 = trk_elem_x ^ 2
		gen trk_mid_x2 = trk_mid_x ^ 2
		
		capture drop samp2
		gen samp2 = samp2_`xx'
		
		** subsamples
		foreach ss in samp0 samp1 samp2 samp3 {
			foreach pp in 25 75 50 {
				
				** OLS
				reghdfe pred_`pp'_`yy'_half $CTRLVARS $CTRLFLEX $TREAT_VARS if `ss' [aw=obs], absorb(base_year base_campus) vce(cluster base_district)
				eststo, title("mob_`pp'_`yy'::trk_`xx'_cy_dm::yc::flex::`ss'")
				
				** bm includes only variation not driven by attrition
				foreach yg in bm am {
					** 
					foreach yygg in cy dy {
						di "mob_`pp'_`yy'::trk_`xx'_`yygg'_`yg'::yc::`ss'"
						
						capture drop trk_elem_z trk_mid_z trk_elem_z2 trk_mid_z2
						
						gen trk_elem_z = trk_`xx'_elem_`yygg'_`yg'
						gen trk_mid_z = trk_`xx'_mid_`yygg'_`yg'
						gen trk_elem_z2 = trk_elem_z ^ 2
						gen trk_mid_z2 = trk_mid_z ^ 2
						
						** instrumental variables
						ivreghdfe pred_`pp'_`yy'_half $CTRLVARS $CTRLFLEX ( $TREAT_VARS = $INSTR_VARS ) ///
							if `ss' [aw=obs], absorb(base_year base_campus) vce(cluster base_district)
						eststo, title("mob_`pp'_`yy'::trk_`xx'_`yygg'_`yg'::yc::flex::`ss'")
						
						** quadratic measures and instruments
						ivreghdfe pred_`pp'_`yy'_half $CTRLVARS $CTRLFLEX ( $TREAT_VARS2 = $INSTR_VARS2 )  ///
							if `ss' [aw=obs], absorb(base_year base_campus) vce(cluster base_district)
						eststo, title("mob_`pp'_`yy'::trk_`xx'_`yygg'_`yg'::yc::sq::`ss'")
					}
				}
			}
		}
	}
	
	esttab using "$WORKING/track_mob_2-`yy'.csv", csv b(%9.5f) se(%9.5f) r2 ar2 ///
			scalars(N_clust) sfmt(%9.8g) nostar wide mlabels(,titles) replace
	estimates clear
}



log close







